# coding:utf-8
from pyspark import SparkConf, SparkContext
import os

os.environ['JAVA_HOME'] = '/server/jdk'

if __name__ == '__main__':
    conf = SparkConf().setAppName('test').setAppName('local[*]')
    sc = SparkContext(conf=conf)

    rdd1 = sc.parallelize([1,1,3,3])
    rdd2 = sc.parallelize(['a','b','a'])

    rdd3 = rdd1.union(rdd2)
    print(rdd3.collect())

'''
1.union算子是不会去重的
2.rdd类型不同也是可以合并的
'''